--**********************************************************************************************************************
--*** AGGREGATION Claim Information on Patent Level ********************************************************************
--**********************************************************************************************************************

UPDATE epo_01_claims_from_ep_xml SET process_claims_2 = FALSE, process_claims_5 = FALSE WHERE use_claims IS TRUE;
UPDATE epo_02_claims_from_us_csv SET process_claims_2 = FALSE, process_claims_5 = FALSE WHERE use_claims IS TRUE;
UPDATE epo_01_claims_from_ep_xml SET claim_number_corrected = claim_number::INT WHERE claim_number_corrected is NULL and claim_number is not NULL;


DROP TABLE IF EXISTS public.epo_06_aggregated_ep_claims_stats;
CREATE TABLE public.epo_06_aggregated_ep_claims_stats (
                                        pat_no TEXT,
                                        publn_auth TEXT,
                                        publn_kind TEXT,
                                        process_count_2 INT,
                                        process_count_2_ind INT,
                                        process_count_5 INT,
                                        process_count_5_ind INT,
                                        use_count INT,
                                        use_count_ind INT,
                                        product_by_process_count_2 INT,
                                        product_by_process_count_2_ind INT,
                                        product_by_process_count_5 INT,
                                        product_by_process_count_5_ind INT,
                                        process_count_pred INT,
                                        process_count_pred_ind INT,
                                        total_count INT,
                                        total_count_ind INT,
                                        process_ratio_2 DECIMAL,
                                        process_ratio_2_ind DECIMAL,
                                        process_ratio_5 DECIMAL,
                                        process_ratio_5_ind DECIMAL,
                                        use_ratio DECIMAL,
                                        use_ratio_ind DECIMAL,
                                        product_by_process_ratio_2 DECIMAL ,
                                        product_by_process_ratio_2_ind DECIMAL ,
                                        product_by_process_ratio_5 DECIMAL ,
                                        product_by_process_ratio_5_ind DECIMAL ,
                                        process_ratio_pred DECIMAL,
                                        process_ratio_pred_ind DECIMAL,
	 									claim_lang TEXT
                                      )
;


DROP TABLE IF EXISTS public.epo_07_aggregated_us_claims_stats;
CREATE TABLE public.epo_07_aggregated_us_claims_stats (
                                        pat_no TEXT,
                                        publn_auth TEXT,
                                        publn_kind TEXT,
                                        process_count_2 INT,
                                        process_count_2_ind INT,
                                        process_count_5 INT,
                                        process_count_5_ind INT,
                                        use_count INT,
                                        use_count_ind INT,
                                        product_by_process_count_2 INT,
                                        product_by_process_count_2_ind INT,
                                        product_by_process_count_5 INT,
                                        product_by_process_count_5_ind INT,
                                        process_count_pred INT,
                                        process_count_pred_ind INT,
                                        total_count INT,
                                        total_count_ind INT,
                                        process_ratio_2 DECIMAL,
                                        process_ratio_2_ind DECIMAL,
                                        process_ratio_5 DECIMAL,
                                        process_ratio_5_ind DECIMAL,
                                        use_ratio DECIMAL,
                                        use_ratio_ind DECIMAL,
                                        product_by_process_ratio_2 DECIMAL ,
                                        product_by_process_ratio_2_ind DECIMAL ,
                                        product_by_process_ratio_5 DECIMAL ,
                                        product_by_process_ratio_5_ind DECIMAL ,
                                        process_ratio_pred DECIMAL,
                                        process_ratio_pred_ind DECIMAL,
	 									claim_lang TEXT
                                      )
;


INSERT INTO public.epo_06_aggregated_ep_claims_stats (
       pat_no,
       publn_auth,
       publn_kind,
       process_count_2 ,
       process_count_5 ,
       use_count ,
       product_by_process_count_2 ,
       product_by_process_count_5 ,
       total_count ,
       process_ratio_2 ,
       process_ratio_5 ,
       use_ratio ,
       product_by_process_ratio_2  ,
       product_by_process_ratio_5  ,
       claim_lang
       )


SELECT t1.pat_nr,
    	 t1.publn_auth,
			 t1.publn_kind,

       sum(process_claims_2::INT),
       sum(process_claims_5::INT),

       sum(use_claims::INT),

       sum(product_by_process_2::INT),
       sum(product_by_process_5::INT),

       count(*),

       sum(process_claims_2::INT)::DECIMAL / count(*)::DECIMAL,
       sum(process_claims_5::INT)::DECIMAL / count(*)::DECIMAL,

       sum(use_claims::INT)::DECIMAL / count(*)::DECIMAL,

       sum(product_by_process_2::INT)::DECIMAL / count(*)::DECIMAL,
       sum(product_by_process_5::INT)::DECIMAL / count(*)::DECIMAL,

       claim_lang

FROM public.epo_01_claims_from_ep_xml AS t1

/*Exclude empty/'canceled'/null*/
WHERE claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'

GROUP BY  t1.publn_auth,
          t1.pat_nr,
          t1.publn_kind,
          t1.claim_lang;
          
CREATE INDEX epo_06_aggregated_ep_claims_stats_pat_no ON epo_06_aggregated_ep_claims_stats(pat_no);
CREATE INDEX epo_06_aggregated_ep_claims_stats_publn_kind ON epo_06_aggregated_ep_claims_stats(publn_kind);
CREATE INDEX epo_06_aggregated_ep_claims_stats_claim_lang ON epo_06_aggregated_ep_claims_stats(claim_lang);

-- we need to update the process counts and process ratios whenever processes and product by processes occur simultaneously (this is not possible because a product-by-process is a product rather than a process)
-- we focus on product_by_process_2 because there are not any cases where product_by_process_2 is FALSE and product_by_process_5 TRUE

UPDATE public.epo_06_aggregated_ep_claims_stats t1 SET
	process_count_2 = process_count_2 - product_by_process_count_2,  
	process_ratio_2 = (process_count_2 - product_by_process_count_2)::DECIMAL/ total_count::DECIMAL
	
	FROM (
	SELECT DISTINCT pat_nr, publn_kind, claim_lang FROM
	public.epo_01_claims_from_ep_xml t2 
	WHERE process_claims_2 IS TRUE AND product_by_process_2 IS TRUE) AS t2
	WHERE t1.pat_no = t2.pat_nr AND t1.publn_kind = t2.publn_kind AND t1.claim_lang = t2.claim_lang
	;

UPDATE public.epo_06_aggregated_ep_claims_stats t1 SET
	process_count_5 = process_count_5 - product_by_process_count_2,
	process_ratio_5 = (process_count_5 - product_by_process_count_2)::DECIMAL/ total_count::DECIMAL
	
	FROM (
	SELECT DISTINCT pat_nr, publn_kind, claim_lang FROM
	public.epo_01_claims_from_ep_xml t2 
	WHERE process_claims_5 IS TRUE AND product_by_process_2 IS TRUE) AS t2
	WHERE t1.pat_no = t2.pat_nr AND t1.publn_kind = t2.publn_kind AND t1.claim_lang = t2.claim_lang
    ;



INSERT INTO public.epo_07_aggregated_us_claims_stats (
       pat_no,
       publn_auth,
       publn_kind,
       process_count_2 ,
       process_count_5 ,
       use_count ,
       product_by_process_count_2 ,
       product_by_process_count_5 ,
       total_count ,
       process_ratio_2 ,
       process_ratio_5 ,
       use_ratio ,
       product_by_process_ratio_2  ,
       product_by_process_ratio_5  ,
       claim_lang
                                      )
SELECT 	t1.pat_no,
    		'US',
			  NULL,

       sum(process_claims_2::INT),
       sum(process_claims_5::INT),

       sum(use_claims::INT),

       sum(product_by_process_2::INT),
       sum(product_by_process_5::INT),

       count(*),

       (sum(process_claims_2::INT)::DECIMAL / count(*)::DECIMAL),
       (sum(process_claims_5::INT)::DECIMAL / count(*)::DECIMAL),

       (sum(use_claims::INT)::DECIMAL / count(*)::DECIMAL),

       (sum(product_by_process_2::INT)::DECIMAL / count(*)::DECIMAL),
       (sum(product_by_process_5::INT)::DECIMAL / count(*)::DECIMAL),

       'en'

FROM public.epo_02_claims_from_us_csv AS t1

/*Exclude empty/'canceled'/null*/
WHERE claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'

GROUP BY  t1.pat_no
;


CREATE INDEX epo_07_aggregated_us_claims_stats_pat_no ON epo_07_aggregated_us_claims_stats(pat_no);

UPDATE public.epo_07_aggregated_us_claims_stats t1 SET
	process_count_2 = process_count_2 - product_by_process_count_2,
	process_ratio_2 = (process_count_2 - product_by_process_count_2)::DECIMAL/ total_count::DECIMAL
	
	FROM (
	SELECT DISTINCT pat_no FROM
	public.epo_02_claims_from_us_csv t2 
	WHERE process_claims_2 IS TRUE AND product_by_process_2 IS TRUE) AS t2
	WHERE t1.pat_no = t2.pat_no 
	;

UPDATE public.epo_07_aggregated_us_claims_stats t1 SET
	process_count_5 = process_count_5 - product_by_process_count_2,
	process_ratio_5 = (process_count_5 - product_by_process_count_2)::DECIMAL/ total_count::DECIMAL
	
	FROM (
	SELECT DISTINCT pat_no FROM
	public.epo_02_claims_from_us_csv t2 
	WHERE process_claims_5 IS TRUE AND product_by_process_2 IS TRUE) AS t2
	WHERE t1.pat_no = t2.pat_no 
	;





/*Verify the aggregated counts*/
--SELECT count(*)
  --FROM epo_07_aggregated_us_claims_stats
    --;
-- 8'946'258

--SELECT count(DISTINCT pat_no)
  --FROM epo_02_claims_from_us_csv
--WHERE claim_text_ IS NOT NULL
  --AND claim_text_ != ''
  --AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
--;
-- 8'946'258


--SELECT count(*)
  --FROM epo_06_aggregated_ep_claims_stats
    --;
-- 6'329'838

--SELECT count(DISTINCT concat(publn_auth, pat_nr, publn_kind, claim_lang))
  --FROM epo_01_claims_from_ep_xml
--WHERE claim_text_ IS NOT NULL
  --AND claim_text_ != ''
  --AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
--;
-- 6'329'838





/***** Update values for only independent claims - EPO ****************************************************************/

UPDATE epo_06_aggregated_ep_claims_stats

  SET process_count_2_ind = t2.process_count_2_ind ,
      process_count_5_ind = t2.process_count_5_ind ,

      use_count_ind = t2.use_count_ind ,

      product_by_process_count_2_ind = t2.product_by_process_count_2_ind ,
      product_by_process_count_5_ind = t2.product_by_process_count_5_ind ,

      total_count_ind = t2.total_count_ind ,

      process_ratio_2_ind = t2.process_ratio_2_ind ,
      process_ratio_5_ind = t2.process_ratio_5_ind ,

      use_ratio_ind = t2.use_ratio_ind ,

      product_by_process_ratio_2_ind = t2.product_by_process_ratio_2_ind ,
      product_by_process_ratio_5_ind = t2.product_by_process_ratio_5_ind

  FROM (SELECT publn_auth ,
               pat_nr ,
               publn_kind ,
               claim_lang ,

               sum(process_claims_2::INT) AS process_count_2_ind ,
               sum(process_claims_5::INT) AS process_count_5_ind ,

               sum(use_claims::INT) AS use_count_ind ,

               sum(product_by_process_2::INT) AS product_by_process_count_2_ind ,
               sum(product_by_process_5::INT) AS product_by_process_count_5_ind ,

               count(*) AS total_count_ind,

               sum(process_claims_2::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_2_ind ,
               sum(process_claims_5::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_5_ind ,

               sum(use_claims::INT)::DECIMAL / count(*)::DECIMAL AS use_ratio_ind ,

               sum(product_by_process_2::INT)::DECIMAL / count(*)::DECIMAL AS product_by_process_ratio_2_ind ,
               sum(product_by_process_5::INT)::DECIMAL / count(*)::DECIMAL AS product_by_process_ratio_5_ind


        FROM public.epo_01_claims_from_ep_xml

        WHERE claim_text_ IS NOT NULL
		AND depending_claim_ IS FALSE
        AND claim_text_ != ''
        AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'


        GROUP BY publn_auth ,
                 pat_nr ,
                 publn_kind ,
                 claim_lang

       ) AS t2

  WHERE epo_06_aggregated_ep_claims_stats.pat_no = t2.pat_nr
    AND epo_06_aggregated_ep_claims_stats.publn_auth = t2.publn_auth
    AND epo_06_aggregated_ep_claims_stats.publn_kind = t2.publn_kind
    AND epo_06_aggregated_ep_claims_stats.claim_lang = t2.claim_lang
;






UPDATE public.epo_06_aggregated_ep_claims_stats t1 SET
	process_count_2_ind = process_count_2_ind - product_by_process_count_2_ind,  
	process_ratio_2_ind = (process_count_2_ind - product_by_process_count_2_ind)::DECIMAL/ total_count_ind::DECIMAL
	
	FROM (
	SELECT DISTINCT pat_nr, publn_kind, claim_lang FROM
	public.epo_01_claims_from_ep_xml t2 
	WHERE process_claims_2 IS TRUE AND product_by_process_2 IS TRUE AND depending_claim_ IS FALSE) AS t2
	WHERE t1.pat_no = t2.pat_nr AND t1.publn_kind = t2.publn_kind AND t1.claim_lang = t2.claim_lang
	;

UPDATE public.epo_06_aggregated_ep_claims_stats t1 SET
	process_count_5_ind = process_count_5_ind - product_by_process_count_2_ind,
	process_ratio_5_ind = (process_count_5_ind - product_by_process_count_2_ind)::DECIMAL/ total_count_ind::DECIMAL
	
	FROM (
	SELECT DISTINCT pat_nr, publn_kind, claim_lang FROM
	public.epo_01_claims_from_ep_xml t2 
	WHERE process_claims_5 IS TRUE AND product_by_process_2 IS TRUE AND depending_claim_ IS FALSE) AS t2
	WHERE t1.pat_no = t2.pat_nr AND t1.publn_kind = t2.publn_kind AND t1.claim_lang = t2.claim_lang
    ;





/***** Update values for only independent claims - USPTO **************************************************************/

UPDATE epo_07_aggregated_us_claims_stats

  SET process_count_2_ind = t2.process_count_2_ind ,
      process_count_5_ind = t2.process_count_5_ind ,

      use_count_ind = t2.use_count_ind ,

      product_by_process_count_2_ind = t2.product_by_process_count_2_ind ,
      product_by_process_count_5_ind = t2.product_by_process_count_5_ind ,

      total_count_ind = t2.total_count_ind ,

      process_ratio_2_ind = t2.process_ratio_2_ind ,
      process_ratio_5_ind = t2.process_ratio_5_ind ,

      use_ratio_ind = t2.use_ratio_ind ,

      product_by_process_ratio_2_ind = t2.product_by_process_ratio_2_ind ,
      product_by_process_ratio_5_ind = t2.product_by_process_ratio_5_ind

  FROM (

        SELECT pat_no ,

               sum(process_claims_2::INT) AS process_count_2_ind ,
               sum(process_claims_5::INT) AS process_count_5_ind ,

               sum(use_claims::INT) AS use_count_ind ,

               sum(product_by_process_2::INT) AS product_by_process_count_2_ind ,
               sum(product_by_process_5::INT) AS product_by_process_count_5_ind ,

               count(*) AS total_count_ind,

               sum(process_claims_2::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_2_ind ,
               sum(process_claims_5::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_5_ind ,

               sum(use_claims::INT)::DECIMAL / count(*)::DECIMAL AS use_ratio_ind ,

               sum(product_by_process_2::INT)::DECIMAL / count(*)::DECIMAL AS product_by_process_ratio_2_ind ,
               sum(product_by_process_5::INT)::DECIMAL / count(*)::DECIMAL AS product_by_process_ratio_5_ind


        FROM public.epo_02_claims_from_us_csv

        WHERE claim_text_ IS NOT NULL
          AND claim_text_ != ''
          AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
          AND depending_claim_ IS FALSE
        --AND (encode(ind_flg,'escape') = '1' OR dependencies = claim_no)

        GROUP BY  pat_no

       ) AS t2

  WHERE epo_07_aggregated_us_claims_stats.pat_no = t2.pat_no
;





UPDATE public.epo_07_aggregated_us_claims_stats t1 SET
	process_count_2_ind = process_count_2_ind - product_by_process_count_2_ind,
	process_ratio_2_ind = (process_count_2_ind - product_by_process_count_2_ind)::DECIMAL/ total_count_ind::DECIMAL
	
	FROM (
	SELECT DISTINCT pat_no FROM
	public.epo_02_claims_from_us_csv t2 
	WHERE process_claims_2 IS TRUE AND product_by_process_2 IS TRUE AND depending_claim_ IS FALSE) AS t2
	WHERE t1.pat_no = t2.pat_no 
	;

UPDATE public.epo_07_aggregated_us_claims_stats t1 SET
	process_count_5_ind = process_count_5_ind - product_by_process_count_2_ind,
	process_ratio_5_ind = (process_count_5_ind - product_by_process_count_2_ind)::DECIMAL/ total_count_ind::DECIMAL
	
	FROM (
	SELECT DISTINCT pat_no FROM
	public.epo_02_claims_from_us_csv t2 
	WHERE process_claims_5 IS TRUE AND product_by_process_2 IS TRUE AND depending_claim_ IS FALSE) AS t2
	WHERE t1.pat_no = t2.pat_no 
	;





------------------------------------------------------------
--- add data from text mining
------------------------------------------------------------
/*
UPDATE public.epo_06_aggregated_ep_claims_stats SET
       pat_no = t2.pat_nr,
       publn_auth = t2.publn_auth,
       publn_kind = t2.publn_kind,
       process_count_pred = t2.process_count_pred,
       process_ratio_pred = t2.process_ratio_pred,
       claim_lang = t2.claim_lang
       FROM (

SELECT t1.pat_nr,
    	 t1.publn_auth,
			 t1.publn_kind,

       sum(process_claim_pred::INT) AS process_count_pred,

       sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_pred,

       claim_lang

FROM (
SELECT t.publn_auth, t.publn_kind, t.claim_lang, t_.claims_key, t_.pat_nr, t_.claim_number, t_.claim_number_corrected, id, process_claim_pred
FROM public.epo_01_claims_from_ep_xml t
JOIN public.epo_ep_claims_prediction_results t_
ON (t.claims_key = t_.claims_key AND t.pat_nr = t_.pat_nr AND t.claim_number::INTEGER = t_.claim_number AND t.claim_number_corrected = t_.claim_number_corrected AND t.claim_id = t_.id)

/*Exclude empty/'canceled'/null*/
WHERE claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
  
  AND t_.publn_auth IS NULL

) AS t1

GROUP BY  t1.publn_auth,
          t1.pat_nr,
          t1.publn_kind,
          t1.claim_lang) AS t2
WHERE epo_06_aggregated_ep_claims_stats.pat_no = t2.pat_nr
    AND epo_06_aggregated_ep_claims_stats.publn_auth = t2.publn_auth
    AND epo_06_aggregated_ep_claims_stats.publn_kind = t2.publn_kind
    AND epo_06_aggregated_ep_claims_stats.claim_lang = t2.claim_lang          
          ;
          
UPDATE public.epo_06_aggregated_ep_claims_stats SET
       pat_no = t2.pat_nr,
       publn_auth = t2.publn_auth,
       publn_kind = t2.publn_kind,
       process_count_pred = t2.process_count_pred,
       process_ratio_pred = t2.process_ratio_pred,
       claim_lang = t2.claim_lang
       FROM (

SELECT t1.pat_nr,
    	 t1.publn_auth,
			 t1.publn_kind,

       sum(process_claim_pred::INT) AS process_count_pred,

       sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_pred,

       claim_lang

FROM (
SELECT t_.publn_auth, t_.publn_kind, t.claim_lang, t_.claims_key, t_.pat_nr, t_.claim_number, t_.claim_number_corrected, id, process_claim_pred
FROM public.epo_01_claims_from_ep_xml t
JOIN public.epo_ep_claims_prediction_results t_
ON (t.pat_nr = t_.pat_nr AND t.claim_number::INTEGER = t_.claim_number AND t.claim_number_corrected = t_.claim_number_corrected AND t.claim_lang = t_.claim_lang AND t.publn_auth = t_.publn_auth AND t.publn_kind = t_.publn_kind)

/*Exclude empty/'canceled'/null*/
WHERE claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
  
  AND t_.publn_auth IS NOT NULL

) AS t1

GROUP BY  t1.publn_auth,
          t1.pat_nr,
          t1.publn_kind,
          t1.claim_lang) AS t2
WHERE epo_06_aggregated_ep_claims_stats.pat_no = t2.pat_nr
    AND epo_06_aggregated_ep_claims_stats.publn_auth = t2.publn_auth
    AND epo_06_aggregated_ep_claims_stats.publn_kind = t2.publn_kind
    AND epo_06_aggregated_ep_claims_stats.claim_lang = t2.claim_lang          
;
                    
          
/***** Update values for only independent claims - EPO **************************************************************/
          
UPDATE public.epo_06_aggregated_ep_claims_stats SET
       pat_no = t2.pat_nr,
       publn_auth = t2.publn_auth,
       publn_kind = t2.publn_kind,
       process_count_pred_ind = t2.process_count_pred_ind,
       process_ratio_pred_ind = t2.process_ratio_pred_ind,
       claim_lang = t2.claim_lang
       FROM (

SELECT t1.pat_nr,
    	 t1.publn_auth,
			 t1.publn_kind,

       sum(process_claim_pred::INT) AS process_count_pred_ind,

       sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_pred_ind,

       claim_lang

FROM (
SELECT t.publn_auth, t.publn_kind, t.claim_lang, t_.claims_key, t_.pat_nr, t_.claim_number, t_.claim_number_corrected, id, process_claim_pred
FROM public.epo_01_claims_from_ep_xml t
JOIN public.epo_ep_claims_prediction_results t_
ON (t.claims_key = t_.claims_key AND t.pat_nr = t_.pat_nr AND t.claim_number::INTEGER = t_.claim_number AND t.claim_number_corrected = t_.claim_number_corrected AND t.claim_id = t_.id)

/*Exclude empty/'canceled'/null*/
WHERE claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
  
  AND t_.publn_auth IS NULL
  AND depending_claim_ IS FALSE) AS t1

GROUP BY  t1.publn_auth,
          t1.pat_nr,
          t1.publn_kind,
          t1.claim_lang) AS t2
WHERE epo_06_aggregated_ep_claims_stats.pat_no = t2.pat_nr
    AND epo_06_aggregated_ep_claims_stats.publn_auth = t2.publn_auth
    AND epo_06_aggregated_ep_claims_stats.publn_kind = t2.publn_kind
    AND epo_06_aggregated_ep_claims_stats.claim_lang = t2.claim_lang          
          ;
          
UPDATE public.epo_06_aggregated_ep_claims_stats SET
       pat_no = t2.pat_nr,
       publn_auth = t2.publn_auth,
       publn_kind = t2.publn_kind,
       process_count_pred_ind = t2.process_count_pred_ind,
       process_ratio_pred_ind = t2.process_ratio_pred_ind,
       claim_lang = t2.claim_lang
       FROM (

SELECT t1.pat_nr,
    	 t1.publn_auth,
			 t1.publn_kind,

       sum(process_claim_pred::INT) AS process_count_pred_ind,

       sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_pred_ind,

       claim_lang

FROM (
SELECT t_.publn_auth, t_.publn_kind, t.claim_lang, t_.claims_key, t_.pat_nr, t_.claim_number, t_.claim_number_corrected, id, process_claim_pred
FROM public.epo_01_claims_from_ep_xml t
JOIN public.epo_ep_claims_prediction_results t_
ON (t.pat_nr = t_.pat_nr AND t.claim_number::INTEGER = t_.claim_number AND t.claim_number_corrected = t_.claim_number_corrected AND t.claim_lang = t_.claim_lang AND t.publn_auth = t_.publn_auth AND t.publn_kind = t_.publn_kind)

/*Exclude empty/'canceled'/null*/
WHERE claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
  
  AND t_.publn_auth IS NOT NULL

  AND depending_claim_ IS FALSE) AS t1

GROUP BY  t1.publn_auth,
          t1.pat_nr,
          t1.publn_kind,
          t1.claim_lang) AS t2
WHERE epo_06_aggregated_ep_claims_stats.pat_no = t2.pat_nr
    AND epo_06_aggregated_ep_claims_stats.publn_auth = t2.publn_auth
    AND epo_06_aggregated_ep_claims_stats.publn_kind = t2.publn_kind
    AND epo_06_aggregated_ep_claims_stats.claim_lang = t2.claim_lang          
    ;
   

--------------------------------------------------------------------------------------------------------------

UPDATE public.epo_07_aggregated_us_claims_stats SET
       pat_no = t2.pat_no,
       process_count_pred = t2.process_count_pred,
       process_ratio_pred = t2.process_ratio_pred
       
       FROM (  
                                           
SELECT 	t1.pat_no,
    		
       sum(process_claim_pred::INT) AS process_count_pred,

       (sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL) AS process_ratio_pred

        FROM (

SELECT t_.* FROM epo_02_claims_from_us_csv t
JOIN public.epo_us_claims_prediction_results t_
ON t.pat_no = t_.pat_no AND t.claim_no = t_.claim_no
 WHERE claim_text_ IS NOT NULL
          AND claim_text_ != ''
          AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
) AS t1

GROUP BY t1.pat_no) AS t2
WHERE epo_07_aggregated_us_claims_stats.pat_no = t2.pat_no
;


/***** Update values for only independent claims - USPTO **************************************************************/

UPDATE epo_07_aggregated_us_claims_stats 

  SET process_count_pred_ind = t2.process_count_pred_ind ,

      process_ratio_pred_ind = t2.process_ratio_pred_ind 

  FROM (

        SELECT pat_no ,

               sum(process_claim_pred::INT) AS process_count_pred_ind ,

               sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_pred_ind 

        FROM (SELECT t.*, t_.depending_claim_ FROM epo_us_claims_prediction_results t
        JOIN epo_02_claims_from_us_csv t_ ON t.pat_no = t_.pat_no AND t.claim_no = t_.claim_no

        WHERE depending_claim_ IS FALSE
        AND claim_text_ IS NOT NULL
        AND claim_text_ != ''
          AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
) AS t1

        GROUP BY  pat_no

       ) AS t2

  WHERE epo_07_aggregated_us_claims_stats.pat_no = t2.pat_no
;
*/


-- add new results

UPDATE public.epo_06_aggregated_ep_claims_stats SET
       process_count_pred = t2.process_count_pred,
       process_ratio_pred = t2.process_ratio_pred
      
       FROM (

SELECT t1.pat_nr,
    	 t1.publn_auth,
			 t1.publn_kind,

       sum(process_claim_pred::INT) AS process_count_pred,

       sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_pred,

       claim_lang

FROM (
SELECT t.publn_auth, t.publn_kind, t.claim_lang, t_.claim_id, t_.pat_nr, t_.claim_number, t_.claim_number_corrected, process_claim_pred
FROM public.epo_01_claims_from_ep_xml t
JOIN public.epo_ep_claims_prediction_results_new t_
ON (t.claim_id = t_.claim_id AND t.pat_nr = t_.pat_nr AND t.claim_number::INTEGER = t_.claim_number AND t.claim_number_corrected = t_.claim_number_corrected)

/*Exclude empty/'canceled'/null*/
WHERE claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
  
  ) AS t1

GROUP BY  t1.publn_auth,
          t1.pat_nr,
          t1.publn_kind,
          t1.claim_lang) AS t2
WHERE epo_06_aggregated_ep_claims_stats.pat_no = t2.pat_nr
    AND epo_06_aggregated_ep_claims_stats.publn_auth = t2.publn_auth
    AND epo_06_aggregated_ep_claims_stats.publn_kind = t2.publn_kind
    AND epo_06_aggregated_ep_claims_stats.claim_lang = t2.claim_lang          
          ;
		  
		  
		  
UPDATE public.epo_06_aggregated_ep_claims_stats SET
       process_count_pred_ind = t2.process_count_pred_ind,
       process_ratio_pred_ind = t2.process_ratio_pred_ind
       FROM (

SELECT t1.pat_nr,
    	 t1.publn_auth,
			 t1.publn_kind,

       sum(process_claim_pred::INT) AS process_count_pred_ind,

       sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_pred_ind,

       claim_lang

FROM (
SELECT t.publn_auth, t.publn_kind, t.claim_lang, t_.claim_id, t_.pat_nr, t_.claim_number, t_.claim_number_corrected, process_claim_pred
FROM public.epo_01_claims_from_ep_xml t
JOIN public.epo_ep_claims_prediction_results_new t_
ON (t.claim_id = t_.claim_id AND t.pat_nr = t_.pat_nr AND t.claim_number::INTEGER = t_.claim_number AND t.claim_number_corrected = t_.claim_number_corrected)

/*Exclude empty/'canceled'/null*/
WHERE claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
  
  AND depending_claim_ IS FALSE) AS t1

GROUP BY  t1.publn_auth,
          t1.pat_nr,
          t1.publn_kind,
          t1.claim_lang) AS t2
WHERE epo_06_aggregated_ep_claims_stats.pat_no = t2.pat_nr
    AND epo_06_aggregated_ep_claims_stats.publn_auth = t2.publn_auth
    AND epo_06_aggregated_ep_claims_stats.publn_kind = t2.publn_kind
    AND epo_06_aggregated_ep_claims_stats.claim_lang = t2.claim_lang          
          ;		  
		  


		  
UPDATE public.epo_07_aggregated_us_claims_stats SET
       process_count_pred = t2.process_count_pred,
       process_ratio_pred = t2.process_ratio_pred
       
       FROM (  
                                           
SELECT 	t1.pat_no,
    		
       sum(process_claim_pred::INT) AS process_count_pred,

       (sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL) AS process_ratio_pred

        FROM (

SELECT t_.* FROM epo_02_claims_from_us_csv t
JOIN public.epo_us_claims_prediction_results_new t_
ON t.pat_no = t_.pat_no AND t.claim_no = t_.claim_no
 WHERE claim_text_ IS NOT NULL
          AND claim_text_ != ''
          AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
) AS t1

GROUP BY t1.pat_no) AS t2
WHERE epo_07_aggregated_us_claims_stats.pat_no = t2.pat_no
;


UPDATE epo_07_aggregated_us_claims_stats 

  SET process_count_pred_ind = t2.process_count_pred_ind ,

      process_ratio_pred_ind = t2.process_ratio_pred_ind 

  FROM (

        SELECT pat_no ,

               sum(process_claim_pred::INT) AS process_count_pred_ind ,

               sum(process_claim_pred::INT)::DECIMAL / count(*)::DECIMAL AS process_ratio_pred_ind 

        FROM (SELECT t.*, t_.depending_claim_ FROM epo_us_claims_prediction_results t
        JOIN epo_02_claims_from_us_csv t_ ON t.pat_no = t_.pat_no AND t.claim_no = t_.claim_no

        WHERE depending_claim_ IS FALSE
        AND claim_text_ IS NOT NULL
        AND claim_text_ != ''
          AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
) AS t1

        GROUP BY  pat_no

       ) AS t2

  WHERE epo_07_aggregated_us_claims_stats.pat_no = t2.pat_no
;



-------------------------------------------------------------------------------------------------------------------------
-- add classification results from first claim
-------------------------------------------------------------------------------------------------------------------------

ALTER TABLE epo_06_aggregated_ep_claims_stats 
ADD COLUMN first_process_claim_2 BOOLEAN, 
ADD COLUMN first_process_claim_5 BOOLEAN, 
ADD COLUMN first_process_claim_pred BOOLEAN,
ADD COLUMN first_use_claim BOOLEAN, 
ADD COLUMN first_product_by_process_2 BOOLEAN, ADD COLUMN first_product_by_process_5 BOOLEAN;

UPDATE epo_06_aggregated_ep_claims_stats t1
SET first_process_claim_2 = t2.process_claims_2, 
first_process_claim_5 = t2.process_claims_5, 
first_use_claim = t2.use_claims,
first_product_by_process_2 = t2.product_by_process_2, 
first_product_by_process_5 = t2.product_by_process_5
FROM 
epo_01_claims_from_ep_xml t2 
WHERE t1.pat_no = t2.pat_nr AND t1.publn_kind = t2.publn_kind AND t1.claim_lang = t2.claim_lang
AND claim_number_corrected = 1
AND claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$';
  
  
UPDATE epo_06_aggregated_ep_claims_stats t1
SET first_process_claim_pred = t2.process_claim_pred
FROM (
SELECT claim_text_, t.publn_auth, t.publn_kind, t.claim_lang, t_.claims_key, t_.pat_nr, t_.claim_number, t_.claim_number_corrected, id, process_claim_pred
FROM public.epo_01_claims_from_ep_xml t
JOIN public.epo_ep_claims_prediction_results t_
ON (t.claims_key = t_.claims_key AND t.pat_nr = t_.pat_nr AND t.claim_number::INTEGER = t_.claim_number AND t.claim_number_corrected = t_.claim_number_corrected AND t.claim_id = t_.id)

  WHERE t_.publn_auth IS NULL
  AND t.claim_number_corrected = 1

) AS t2
WHERE t1.pat_no = t2.pat_nr AND t1.publn_kind = t2.publn_kind AND t1.claim_lang = t2.claim_lang
AND claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
; 
 
UPDATE epo_06_aggregated_ep_claims_stats t1
SET first_process_claim_pred = t2.process_claim_pred
FROM (
SELECT claim_text_, t_.publn_auth, t_.publn_kind, t.claim_lang, t_.claims_key, t_.pat_nr, t_.claim_number, t_.claim_number_corrected, id, process_claim_pred
FROM public.epo_01_claims_from_ep_xml t
JOIN public.epo_ep_claims_prediction_results t_
ON (t.pat_nr = t_.pat_nr AND t.claim_number::INTEGER = t_.claim_number AND t.claim_number_corrected = t_.claim_number_corrected 
AND t.claim_lang = t_.claim_lang AND t.publn_auth = t_.publn_auth AND t.publn_kind = t_.publn_kind)

  WHERE t_.publn_auth IS NOT NULL
  AND t.claim_number_corrected = 1

) AS t2
WHERE t1.pat_no = t2.pat_nr AND t1.publn_kind = t2.publn_kind AND t1.claim_lang = t2.claim_lang
AND claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
; 
 


  
ALTER TABLE epo_07_aggregated_us_claims_stats ADD COLUMN first_process_claim_2 BOOLEAN, 
ADD COLUMN first_process_claim_5 BOOLEAN, ADD COLUMN first_use_claim BOOLEAN, ADD COLUMN first_process_claim_pred BOOLEAN,
ADD COLUMN first_product_by_process_2 BOOLEAN, ADD COLUMN first_product_by_process_5 BOOLEAN;

UPDATE epo_07_aggregated_us_claims_stats t1
SET first_process_claim_2 = t2.process_claims_2, 
first_process_claim_5 = t2.process_claims_5, 
first_use_claim = t2.use_claims,
first_product_by_process_2 = t2.product_by_process_2, 
first_product_by_process_5 = t2.product_by_process_5
FROM 
epo_02_claims_from_us_csv t2 
WHERE t1.pat_no = t2.pat_no 
AND claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
  AND claim_no = 1;


UPDATE epo_07_aggregated_us_claims_stats t1
SET first_process_claim_pred = t2.process_claim_pred
FROM (
SELECT claim_text_, t_.pat_no, t_.claim_no, process_claim_pred
FROM public.epo_02_claims_from_us_csv t
JOIN public.epo_us_claims_prediction_results t_
ON (t.pat_no = t_.pat_no AND t.claim_no = t_.claim_no)

  WHERE t.claim_no = 1

) AS t2
WHERE t1.pat_no = t2.pat_no 
AND claim_text_ IS NOT NULL
  AND claim_text_ != ''
  AND claim_text_ !~ '^\s*[0-9]*\mcanceled\M\s*$'
; 














--**********************************************************************************************************************
--*** Create a list of all EP and US patents ***************************************************************************
--**********************************************************************************************************************

/*Full List with all EP and US Patents from tls211*/
DROP TABLE IF EXISTS epo_08_patent_list;
CREATE TABLE epo_08_patent_list AS

SELECT  t1.publn_auth ,
        t1.publn_nr ,
        t1.publn_kind ,
        t1.appln_id ,
        publn_first_grant ,
        publn_claims ,
        publn_nr_original ,
        publn_date

  FROM patstat.tls211_pat_publn AS t1

  WHERE t1.publn_auth = 'EP' OR t1.publn_auth = 'US'
;




ALTER TABLE epo_08_patent_list ADD COLUMN appln_filing_date DATE
                             , ADD COLUMN appln_filing_year SMALLINT
                             , ADD COLUMN earliest_filing_date DATE
                             , ADD COLUMN earliest_filing_year SMALLINT
                             , ADD COLUMN docdb_family_id INT

                             , ADD COLUMN process_count_2 INT
                             , ADD COLUMN process_count_2_ind INT
                             , ADD COLUMN process_count_5 INT
                             , ADD COLUMN process_count_5_ind INT
                             , ADD COLUMN process_count_pred INT
                             , ADD COLUMN process_count_pred_ind INT

                             , ADD COLUMN use_count INT
                             , ADD COLUMN use_count_ind INT

                             , ADD COLUMN product_by_process_count_2 INT
                             , ADD COLUMN product_by_process_count_2_ind INT
                             , ADD COLUMN product_by_process_count_5 INT
                             , ADD COLUMN product_by_process_count_5_ind INT

                             , ADD COLUMN total_count INT
                             , ADD COLUMN total_count_ind INT

                             , ADD COLUMN process_ratio_2 DECIMAL
                             , ADD COLUMN process_ratio_2_ind DECIMAL
                             , ADD COLUMN process_ratio_5 DECIMAL
                             , ADD COLUMN process_ratio_5_ind DECIMAL
                             , ADD COLUMN process_ratio_pred DECIMAL
                             , ADD COLUMN process_ratio_pred_ind DECIMAL

                             , ADD COLUMN use_ratio DECIMAL
                             , ADD COLUMN use_ratio_ind DECIMAL

                             , ADD COLUMN product_by_process_ratio_2 DECIMAL
                             , ADD COLUMN product_by_process_ratio_2_ind DECIMAL
                             , ADD COLUMN product_by_process_ratio_5 DECIMAL
                             , ADD COLUMN product_by_process_ratio_5_ind DECIMAL
                             
                             , ADD COLUMN first_process_claim_2 BOOLEAN
                             , ADD COLUMN first_process_claim_5 BOOLEAN 
                             , ADD COLUMN first_use_claim BOOLEAN 
                             , ADD COLUMN first_process_claim_pred BOOLEAN
                             , ADD COLUMN first_product_by_process_2 BOOLEAN 
                             , ADD COLUMN first_product_by_process_5 BOOLEAN

                             , ADD COLUMN claim_lang TEXT

                             , ADD COLUMN title_process BOOLEAN
                             , ADD COLUMN title_use BOOLEAN

                             , ADD COLUMN abstract_process BOOLEAN
                             , ADD COLUMN abstract_use BOOLEAN
                             , ADD COLUMN abstract_process_pred BOOLEAN
                                                      
                             , ADD COLUMN claim_from_family BOOLEAN
                             , ADD COLUMN pred_from_family BOOLEAN
                             , ADD COLUMN match_publn_nr BOOLEAN
                             , ADD COLUMN match_incl_zero BOOLEAN
                             , ADD COLUMN match_publn_nr_original BOOLEAN;





CREATE INDEX epo_patent_list_docdb_family_id_index_2 ON public.epo_08_patent_list (docdb_family_id);




--Add further data from tls201
UPDATE epo_08_patent_list t1

SET   docdb_family_id = t2.docdb_family_id
    , appln_filing_date = t2.appln_filing_date
    , appln_filing_year = t2.appln_filing_year 
    , earliest_filing_date = t2.earliest_filing_date
    , earliest_filing_year = t2.earliest_filing_year

FROM patstat.tls201_appln AS t2
WHERE t1.appln_id = t2.appln_id
;








--**********************************************************************************************************************
--*** Match the Claim Data to the Patent List **************************************************************************
--**********************************************************************************************************************

/*Update the EP Patents with Claims information*/

/***** Add english claims *********************************************************************************************/
UPDATE public.epo_08_patent_list
SET   process_count_2                = t2.process_count_2
    , process_count_2_ind            = t2.process_count_2_ind
    , process_count_5                = t2.process_count_5
    , process_count_5_ind            = t2.process_count_5_ind
    , process_count_pred             = t2.process_count_pred
    , process_count_pred_ind         = t2.process_count_pred_ind

    , use_count                      = t2.use_count
    , use_count_ind                  = t2.use_count_ind

    , product_by_process_count_2     = t2.product_by_process_count_2
    , product_by_process_count_2_ind = t2.product_by_process_count_2_ind
    , product_by_process_count_5     = t2.product_by_process_count_5
    , product_by_process_count_5_ind = t2.product_by_process_count_5_ind

    , total_count                    = t2.total_count
    , total_count_ind                = t2.total_count_ind

    , process_ratio_2                = t2.process_ratio_2
    , process_ratio_2_ind            = t2.process_ratio_2_ind

    , process_ratio_5                = t2.process_ratio_5
    , process_ratio_5_ind            = t2.process_ratio_5_ind

    , process_ratio_pred             = t2.process_ratio_pred
    , process_ratio_pred_ind         = t2.process_ratio_pred_ind
    
    , use_ratio                      = t2.use_ratio
    , use_ratio_ind                  = t2.use_ratio_ind

    , product_by_process_ratio_2     = t2.product_by_process_ratio_2
    , product_by_process_ratio_2_ind = t2.product_by_process_ratio_2_ind
    , product_by_process_ratio_5     = t2.product_by_process_ratio_5
    , product_by_process_ratio_5_ind = t2.product_by_process_ratio_5_ind
    
    , first_process_claim_2          = t2.first_process_claim_2
    , first_process_claim_5          = t2.first_process_claim_5
    , first_use_claim                = t2.first_use_claim
    , first_process_claim_pred       = t2.first_process_claim_pred
    , first_product_by_process_2     = t2.first_product_by_process_2
    , first_product_by_process_5     = t2.first_product_by_process_5
    , claim_lang                     = 'en'  
    , pred_from_family 				 = FALSE
    , claim_from_family 			 = FALSE
    , match_publn_nr 				 = TRUE

  FROM epo_06_aggregated_ep_claims_stats AS t2

WHERE epo_08_patent_list.publn_auth = 'EP'
  AND epo_08_patent_list.publn_nr = t2.pat_no
  AND epo_08_patent_list.publn_kind = t2.publn_kind
  AND t2.claim_lang = 'en'
;


/* Check count of remaining patents without information */
--SELECT count(*)

--FROM epo_08_patent_list

  --WHERE publn_auth = 'EP'
    --AND epo_08_patent_list.total_count IS NOT NULL
--;
--2'800'101
--2'800'093




/***** Add german claims **********************************************************************************************/

UPDATE public.epo_08_patent_list
SET   process_count_2                = t2.process_count_2
    , process_count_2_ind            = t2.process_count_2_ind
    , process_count_5                = t2.process_count_5
    , process_count_5_ind            = t2.process_count_5_ind

    , use_count                      = t2.use_count
    , use_count_ind                  = t2.use_count_ind

    , product_by_process_count_2     = t2.product_by_process_count_2
    , product_by_process_count_2_ind = t2.product_by_process_count_2_ind
    , product_by_process_count_5     = t2.product_by_process_count_5
    , product_by_process_count_5_ind = t2.product_by_process_count_5_ind

    , total_count                    = t2.total_count
    , total_count_ind                = t2.total_count_ind

    , process_ratio_2                = t2.process_ratio_2
    , process_ratio_2_ind            = t2.process_ratio_2_ind

    , process_ratio_5                = t2.process_ratio_5
    , process_ratio_5_ind            = t2.process_ratio_5_ind

    , use_ratio                      = t2.use_ratio
    , use_ratio_ind                  = t2.use_ratio_ind

    , product_by_process_ratio_2     = t2.product_by_process_ratio_2
    , product_by_process_ratio_2_ind = t2.product_by_process_ratio_2_ind
    , product_by_process_ratio_5     = t2.product_by_process_ratio_5
    , product_by_process_ratio_5_ind = t2.product_by_process_ratio_5_ind
    
    , first_process_claim_2          = t2.first_process_claim_2
    , first_process_claim_5          = t2.first_process_claim_5
    , first_use_claim                = t2.first_use_claim
    , first_process_claim_pred       = t2.first_process_claim_pred
    , first_product_by_process_2     = t2.first_product_by_process_2
    , first_product_by_process_5     = t2.first_product_by_process_5
    , claim_lang                     = 'de'
    , pred_from_family 				 = FALSE
    , claim_from_family 			 = FALSE
    , match_publn_nr 				 = TRUE


 FROM epo_06_aggregated_ep_claims_stats AS t2

WHERE epo_08_patent_list.total_count IS NULL
  AND epo_08_patent_list.publn_auth = 'EP'
  AND epo_08_patent_list.publn_nr = t2.pat_no
  AND epo_08_patent_list.publn_kind = t2.publn_kind
  AND t2.claim_lang = 'de'
;



/* Check count of remaining patents without information */
--SELECT count(*)
  --FROM epo_08_patent_list
    --WHERE publn_auth = 'EP'
      --AND epo_08_patent_list.total_count IS NOT NULL
--;
--3'208'292
--3'208'280




/***** Add french claims **********************************************************************************************/
UPDATE public.epo_08_patent_list
SET   process_count_2                = t2.process_count_2
    , process_count_2_ind            = t2.process_count_2_ind
    , process_count_5                = t2.process_count_5
    , process_count_5_ind            = t2.process_count_5_ind

    , use_count                      = t2.use_count
    , use_count_ind                  = t2.use_count_ind

    , product_by_process_count_2     = t2.product_by_process_count_2
    , product_by_process_count_2_ind = t2.product_by_process_count_2_ind
    , product_by_process_count_5     = t2.product_by_process_count_5
    , product_by_process_count_5_ind = t2.product_by_process_count_5_ind

    , total_count                    = t2.total_count
    , total_count_ind                = t2.total_count_ind

    , process_ratio_2                = t2.process_ratio_2
    , process_ratio_2_ind            = t2.process_ratio_2_ind

    , process_ratio_5                = t2.process_ratio_5
    , process_ratio_5_ind            = t2.process_ratio_5_ind

    , use_ratio                      = t2.use_ratio
    , use_ratio_ind                  = t2.use_ratio_ind

    , product_by_process_ratio_2     = t2.product_by_process_ratio_2
    , product_by_process_ratio_2_ind = t2.product_by_process_ratio_2_ind
    , product_by_process_ratio_5     = t2.product_by_process_ratio_5
    , product_by_process_ratio_5_ind = t2.product_by_process_ratio_5_ind
    
    , first_process_claim_2          = t2.first_process_claim_2
    , first_process_claim_5          = t2.first_process_claim_5
    , first_use_claim                = t2.first_use_claim
    , first_process_claim_pred       = t2.first_process_claim_pred
    , first_product_by_process_2     = t2.first_product_by_process_2
    , first_product_by_process_5     = t2.first_product_by_process_5
    
    , claim_lang                     = 'fr'
    , pred_from_family = FALSE
    , claim_from_family = FALSE
    , match_publn_nr = TRUE

  FROM epo_06_aggregated_ep_claims_stats AS t2

WHERE epo_08_patent_list.total_count IS NULL
  AND epo_08_patent_list.publn_auth = 'EP'
  AND epo_08_patent_list.publn_nr = t2.pat_no
  AND epo_08_patent_list.publn_kind = t2.publn_kind
  AND t2.claim_lang = 'fr'
;



/* Check count of remaining patents without information */
--SELECT count(*)
  --FROM epo_08_patent_list
    --WHERE publn_auth = 'EP'
      --AND epo_08_patent_list.total_count IS NOT NULL
;
--3'334'078
--3'334'065



--SELECT count(*)
  --FROM epo_08_patent_list
    --WHERE publn_auth = 'EP'
      --AND epo_08_patent_list.total_count IS NOT NULL
  --ORDER BY random()
;
--Null:     2'589'798
--NOT Null: 3'334'065
--Total:    5'923'863






/***** Update the US Patents with Claims information ******************************************************************/
UPDATE public.epo_08_patent_list
SET   process_count_2                = t2.process_count_2
    , process_count_2_ind            = t2.process_count_2_ind
    , process_count_5                = t2.process_count_5
    , process_count_5_ind            = t2.process_count_5_ind
    , process_count_pred             = t2.process_count_pred
    , process_count_pred_ind         = t2.process_count_pred_ind
    
    , use_count                      = t2.use_count
    , use_count_ind                  = t2.use_count_ind

    , product_by_process_count_2     = t2.product_by_process_count_2
    , product_by_process_count_2_ind = t2.product_by_process_count_2_ind
    , product_by_process_count_5     = t2.product_by_process_count_5
    , product_by_process_count_5_ind = t2.product_by_process_count_5_ind

    , total_count                    = t2.total_count
    , total_count_ind                = t2.total_count_ind

    , process_ratio_2                = t2.process_ratio_2
    , process_ratio_2_ind            = t2.process_ratio_2_ind

    , process_ratio_5                = t2.process_ratio_5
    , process_ratio_5_ind            = t2.process_ratio_5_ind
    
    , process_ratio_pred             = t2.process_ratio_pred
    , process_ratio_pred_ind         = t2.process_ratio_pred_ind

    , use_ratio                      = t2.use_ratio
    , use_ratio_ind                  = t2.use_ratio_ind

    , product_by_process_ratio_2     = t2.product_by_process_ratio_2
    , product_by_process_ratio_2_ind = t2.product_by_process_ratio_2_ind
    , product_by_process_ratio_5     = t2.product_by_process_ratio_5
    , product_by_process_ratio_5_ind = t2.product_by_process_ratio_5_ind
    
    , first_process_claim_2          = t2.first_process_claim_2
    , first_process_claim_5          = t2.first_process_claim_5
    , first_use_claim                = t2.first_use_claim
    , first_process_claim_pred       = t2.first_process_claim_pred
    , first_product_by_process_2     = t2.first_product_by_process_2
    , first_product_by_process_5     = t2.first_product_by_process_5
    , claim_lang                     = 'en'
    , pred_from_family 				 = FALSE
    , claim_from_family 			 = FALSE
    , match_publn_nr 				 = TRUE

  FROM epo_07_aggregated_us_claims_stats AS t2

WHERE epo_08_patent_list.publn_auth = 'US'
  AND epo_08_patent_list.publn_nr = t2.pat_no
;




/***** Update the US Patents with Claims information by publn_nr_original *********************************************/
UPDATE public.epo_08_patent_list
SET   process_count_2                = t2.process_count_2
    , process_count_2_ind            = t2.process_count_2_ind
    , process_count_5                = t2.process_count_5
    , process_count_5_ind            = t2.process_count_5_ind
    , process_count_pred             = t2.process_count_pred
    , process_count_pred_ind         = t2.process_count_pred_ind
    
    , use_count                      = t2.use_count
    , use_count_ind                  = t2.use_count_ind

    , product_by_process_count_2     = t2.product_by_process_count_2
    , product_by_process_count_2_ind = t2.product_by_process_count_2_ind
    , product_by_process_count_5     = t2.product_by_process_count_5
    , product_by_process_count_5_ind = t2.product_by_process_count_5_ind

    , total_count                    = t2.total_count
    , total_count_ind                = t2.total_count_ind

    , process_ratio_2                = t2.process_ratio_2
    , process_ratio_2_ind            = t2.process_ratio_2_ind

    , process_ratio_5                = t2.process_ratio_5
    , process_ratio_5_ind            = t2.process_ratio_5_ind
    
    , process_ratio_pred             = t2.process_ratio_pred
    , process_ratio_pred_ind         = t2.process_ratio_pred_ind

    , use_ratio                      = t2.use_ratio
    , use_ratio_ind                  = t2.use_ratio_ind

    , product_by_process_ratio_2     = t2.product_by_process_ratio_2
    , product_by_process_ratio_2_ind = t2.product_by_process_ratio_2_ind
    , product_by_process_ratio_5     = t2.product_by_process_ratio_5
    , product_by_process_ratio_5_ind = t2.product_by_process_ratio_5_ind
    
    , first_process_claim_2          = t2.first_process_claim_2
    , first_process_claim_5          = t2.first_process_claim_5
    , first_use_claim                = t2.first_use_claim
    , first_process_claim_pred       = t2.first_process_claim_pred
    , first_product_by_process_2     = t2.first_product_by_process_2
    , first_product_by_process_5     = t2.first_product_by_process_5
    , claim_lang                     = 'en'
    , pred_from_family 				 = FALSE
    , claim_from_family 			 = FALSE
    , match_publn_nr_original 		 = TRUE

  FROM epo_07_aggregated_us_claims_stats AS t2

WHERE epo_08_patent_list.publn_auth = 'US'
  AND epo_08_patent_list.total_count IS NULL
  AND epo_08_patent_list.publn_nr_original = t2.pat_no
;




/***** Correct the zero-problem within us patent numbers **************************************************************/
UPDATE public.epo_08_patent_list
SET   process_count_2                = t2.process_count_2
    , process_count_2_ind            = t2.process_count_2_ind
    , process_count_5                = t2.process_count_5
    , process_count_5_ind            = t2.process_count_5_ind
    , process_count_pred             = t2.process_count_pred
    , process_count_pred_ind         = t2.process_count_pred_ind
    
    , use_count                      = t2.use_count
    , use_count_ind                  = t2.use_count_ind

    , product_by_process_count_2     = t2.product_by_process_count_2
    , product_by_process_count_2_ind = t2.product_by_process_count_2_ind
    , product_by_process_count_5     = t2.product_by_process_count_5
    , product_by_process_count_5_ind = t2.product_by_process_count_5_ind

    , total_count                    = t2.total_count
    , total_count_ind                = t2.total_count_ind

    , process_ratio_2                = t2.process_ratio_2
    , process_ratio_2_ind            = t2.process_ratio_2_ind

    , process_ratio_5                = t2.process_ratio_5
    , process_ratio_5_ind            = t2.process_ratio_5_ind
    
    , process_ratio_pred             = t2.process_ratio_pred
    , process_ratio_pred_ind         = t2.process_ratio_pred_ind

    , use_ratio                      = t2.use_ratio
    , use_ratio_ind                  = t2.use_ratio_ind

    , product_by_process_ratio_2     = t2.product_by_process_ratio_2
    , product_by_process_ratio_2_ind = t2.product_by_process_ratio_2_ind
    , product_by_process_ratio_5     = t2.product_by_process_ratio_5
    , product_by_process_ratio_5_ind = t2.product_by_process_ratio_5_ind
    
    , first_process_claim_2          = t2.first_process_claim_2
    , first_process_claim_5          = t2.first_process_claim_5
    , first_use_claim                = t2.first_use_claim
    , first_process_claim_pred       = t2.first_process_claim_pred
    , first_product_by_process_2     = t2.first_product_by_process_2
    , first_product_by_process_5     = t2.first_product_by_process_5

    , claim_lang                     = 'en'
    , pred_from_family 				 = FALSE
    , claim_from_family 			 = FALSE
    , match_incl_zero 				 = TRUE

  FROM epo_07_aggregated_us_claims_stats AS t2

WHERE epo_08_patent_list.publn_auth = 'US'
      AND epo_08_patent_list.total_count IS NULL
      AND epo_08_patent_list.publn_nr LIKE '20%'
      AND epo_08_patent_list.publn_nr = concat(substr(t2.pat_no, 1, 4),
                                               regexp_replace(substr(t2.pat_no, 5, length(t2.pat_no) -4), '^0', '')
                                                            )
;




/***** Update Claim data from families for EPO patents (as first priority from EP family members) *********************/
UPDATE epo_08_patent_list t1
        SET  (
                process_count_2
              , process_count_2_ind
              , process_count_5
              , process_count_5_ind
              , process_count_pred
              , process_count_pred_ind

              , use_count
              , use_count_ind

              , product_by_process_count_2
              , product_by_process_count_2_ind
              , product_by_process_count_5
              , product_by_process_count_5_ind

              , total_count
              , total_count_ind

              , process_ratio_2
              , process_ratio_2_ind
              , process_ratio_5
              , process_ratio_5_ind
              , process_ratio_pred
              , process_ratio_pred_ind

              , use_ratio
              , use_ratio_ind

              , product_by_process_ratio_2
              , product_by_process_ratio_2_ind
              , product_by_process_ratio_5
              , product_by_process_ratio_5_ind
              
              , first_process_claim_2 
              , first_process_claim_5 
              , first_use_claim 
              , first_process_claim_pred 
              , first_product_by_process_2 
              , first_product_by_process_5 

              )  = (

        SELECT  t2.process_count_2
              , t2.process_count_2_ind
              , t2.process_count_5
              , t2.process_count_5_ind
              , t2.process_count_pred
              , t2.process_count_pred_ind

              , t2.use_count
              , t2.use_count_ind

              , t2.product_by_process_count_2
              , t2.product_by_process_count_2_ind
              , t2.product_by_process_count_5
              , t2.product_by_process_count_5_ind

              , t2.total_count
              , t2.total_count_ind

              , t2.process_ratio_2
              , t2.process_ratio_2_ind
              , t2.process_ratio_5
              , t2.process_ratio_5_ind
              , t2.process_ratio_pred
              , t2.process_ratio_pred_ind

              , t2.use_ratio
              , t2.use_ratio_ind

              , t2.product_by_process_ratio_2
              , t2.product_by_process_ratio_2_ind
              , t2.product_by_process_ratio_5
              , t2.product_by_process_ratio_5_ind
              
              , t2.first_process_claim_2 
              , t2.first_process_claim_5 
              , t2.first_use_claim 
              , t2.first_process_claim_pred 
              , t2.first_product_by_process_2 
              , t2.first_product_by_process_5

        FROM epo_08_patent_list t2

        WHERE t2.publn_auth = 'EP'
          AND t1.docdb_family_id = t2.docdb_family_id
          AND t2.total_count IS NOT NULL

        ORDER BY publn_date ASC
        LIMIT  1
        )

  WHERE t1.total_count IS NULL
    AND t1.publn_auth = 'EP'
;



/***** Update Claim data from families for EPO patents (now for US family members) ************************************/
UPDATE epo_08_patent_list t1
        SET  (
                process_count_2
              , process_count_2_ind
              , process_count_5
              , process_count_5_ind
              , process_count_pred
              , process_count_pred_ind

              , use_count
              , use_count_ind

              , product_by_process_count_2
              , product_by_process_count_2_ind
              , product_by_process_count_5
              , product_by_process_count_5_ind

              , total_count
              , total_count_ind

              , process_ratio_2
              , process_ratio_2_ind
              , process_ratio_5
              , process_ratio_5_ind
              , process_ratio_pred
              , process_ratio_pred_ind

              , use_ratio
              , use_ratio_ind

              , product_by_process_ratio_2
              , product_by_process_ratio_2_ind
              , product_by_process_ratio_5
              , product_by_process_ratio_5_ind
              
              , first_process_claim_2 
              , first_process_claim_5 
              , first_use_claim 
              , first_process_claim_pred 
              , first_product_by_process_2 
              , first_product_by_process_5 

              )  = (

        SELECT  t2.process_count_2
              , t2.process_count_2_ind
              , t2.process_count_5
              , t2.process_count_5_ind
              , t2.process_count_pred
              , t2.process_count_pred_ind

              , t2.use_count
              , t2.use_count_ind

              , t2.product_by_process_count_2
              , t2.product_by_process_count_2_ind
              , t2.product_by_process_count_5
              , t2.product_by_process_count_5_ind

              , t2.total_count
              , t2.total_count_ind

              , t2.process_ratio_2
              , t2.process_ratio_2_ind
              , t2.process_ratio_5
              , t2.process_ratio_5_ind
              , t2.process_ratio_pred
              , t2.process_ratio_pred_ind

              , t2.use_ratio
              , t2.use_ratio_ind

              , t2.product_by_process_ratio_2
              , t2.product_by_process_ratio_2_ind
              , t2.product_by_process_ratio_5
              , t2.product_by_process_ratio_5_ind
              
              , t2.first_process_claim_2 
              , t2.first_process_claim_5 
              , t2.first_use_claim 
              , t2.first_process_claim_pred 
              , t2.first_product_by_process_2 
              , t2.first_product_by_process_5

        FROM epo_08_patent_list t2

        WHERE t2.publn_auth = 'US'
          AND t1.docdb_family_id = t2.docdb_family_id
          AND t2.total_count IS NOT NULL

        ORDER BY publn_date ASC
        LIMIT  1
        )

  WHERE t1.total_count IS NULL
    AND t1.publn_auth = 'EP'
;







/***** Update Claim data from families for US patents (with priority for US family members) ***************************/
UPDATE epo_08_patent_list t1
        SET  (
                process_count_2
              , process_count_2_ind
              , process_count_5
              , process_count_5_ind
              , process_count_pred
              , process_count_pred_ind

              , use_count
              , use_count_ind

              , product_by_process_count_2
              , product_by_process_count_2_ind
              , product_by_process_count_5
              , product_by_process_count_5_ind

              , total_count
              , total_count_ind

              , process_ratio_2
              , process_ratio_2_ind
              , process_ratio_5
              , process_ratio_5_ind
              , process_ratio_pred
              , process_ratio_pred_ind

              , use_ratio
              , use_ratio_ind

              , product_by_process_ratio_2
              , product_by_process_ratio_2_ind
              , product_by_process_ratio_5
              , product_by_process_ratio_5_ind
              
              , first_process_claim_2 
              , first_process_claim_5 
              , first_use_claim 
              , first_process_claim_pred 
              , first_product_by_process_2 
              , first_product_by_process_5 

              )  = (

        SELECT  t2.process_count_2
              , t2.process_count_2_ind
              , t2.process_count_5
              , t2.process_count_5_ind
              , t2.process_count_pred
              , t2.process_count_pred_ind

              , t2.use_count
              , t2.use_count_ind

              , t2.product_by_process_count_2
              , t2.product_by_process_count_2_ind
              , t2.product_by_process_count_5
              , t2.product_by_process_count_5_ind

              , t2.total_count
              , t2.total_count_ind

              , t2.process_ratio_2
              , t2.process_ratio_2_ind
              , t2.process_ratio_5
              , t2.process_ratio_5_ind
              , t2.process_ratio_pred
              , t2.process_ratio_pred_ind

              , t2.use_ratio
              , t2.use_ratio_ind

              , t2.product_by_process_ratio_2
              , t2.product_by_process_ratio_2_ind
              , t2.product_by_process_ratio_5
              , t2.product_by_process_ratio_5_ind
              
              , t2.first_process_claim_2 
              , t2.first_process_claim_5 
              , t2.first_use_claim 
              , t2.first_process_claim_pred 
              , t2.first_product_by_process_2 
              , t2.first_product_by_process_5

        FROM epo_08_patent_list t2

        WHERE t2.publn_auth = 'US'

          AND t1.docdb_family_id = t2.docdb_family_id
          AND t2.total_count IS NOT NULL

        ORDER BY publn_date ASC
        LIMIT  1
  )

  WHERE t1.total_count IS NULL
    AND t1.publn_auth = 'US'
;


/***** Update Claim data from families for US patents (now from EP family members) ************************************/
UPDATE epo_08_patent_list t1
  SET  (
                process_count_2
              , process_count_2_ind
              , process_count_5
              , process_count_5_ind
              , process_count_pred
              , process_count_pred_ind

              , use_count
              , use_count_ind

              , product_by_process_count_2
              , product_by_process_count_2_ind
              , product_by_process_count_5
              , product_by_process_count_5_ind

              , total_count
              , total_count_ind

              , process_ratio_2
              , process_ratio_2_ind
              , process_ratio_5
              , process_ratio_5_ind
              , process_ratio_pred
              , process_ratio_pred_ind

              , use_ratio
              , use_ratio_ind

              , product_by_process_ratio_2
              , product_by_process_ratio_2_ind
              , product_by_process_ratio_5
              , product_by_process_ratio_5_ind
              
              , first_process_claim_2 
              , first_process_claim_5 
              , first_use_claim 
              , first_process_claim_pred 
              , first_product_by_process_2 
              , first_product_by_process_5 

              )  = (

        SELECT  t2.process_count_2
              , t2.process_count_2_ind
              , t2.process_count_5
              , t2.process_count_5_ind
              , t2.process_count_pred
              , t2.process_count_pred_ind

              , t2.use_count
              , t2.use_count_ind

              , t2.product_by_process_count_2
              , t2.product_by_process_count_2_ind
              , t2.product_by_process_count_5
              , t2.product_by_process_count_5_ind

              , t2.total_count
              , t2.total_count_ind

              , t2.process_ratio_2
              , t2.process_ratio_2_ind
              , t2.process_ratio_5
              , t2.process_ratio_5_ind
              , t2.process_ratio_pred
              , t2.process_ratio_pred_ind

              , t2.use_ratio
              , t2.use_ratio_ind

              , t2.product_by_process_ratio_2
              , t2.product_by_process_ratio_2_ind
              , t2.product_by_process_ratio_5
              , t2.product_by_process_ratio_5_ind
              
              , t2.first_process_claim_2 
              , t2.first_process_claim_5 
              , t2.first_use_claim 
              , t2.first_process_claim_pred 
              , t2.first_product_by_process_2 
              , t2.first_product_by_process_5

        FROM epo_08_patent_list t2

        WHERE t2.publn_auth = 'EP'
          AND t1.docdb_family_id = t2.docdb_family_id
          AND t2.total_count IS NOT NULL

        ORDER BY publn_date ASC
        LIMIT  1
  )

  WHERE t1.total_count IS NULL
    AND t1.publn_auth = 'US'
;







UPDATE epo_08_patent_list
  SET claim_from_family = TRUE
WHERE total_count IS NOT NULL
  AND claim_from_family IS NOT FALSE
;








/***** Update text-mining data from families for EPO patents (missing if language != 'en') (as first priority from EP family members) *********************/
UPDATE epo_08_patent_list t1
        SET  (
                process_count_pred
              , process_count_pred_ind

              , process_ratio_pred
              , process_ratio_pred_ind

              , first_process_claim_pred 

              , pred_from_family
             
              )  = (

        SELECT  
                t2.process_count_pred
              , t2.process_count_pred_ind

              , t2.process_ratio_pred
              , t2.process_ratio_pred_ind

              , t2.first_process_claim_pred 

              , TRUE

        FROM epo_08_patent_list t2

        WHERE t2.publn_auth = 'EP'
          AND t1.docdb_family_id = t2.docdb_family_id
          AND t2.process_count_pred IS NOT NULL

        ORDER BY publn_date ASC
        LIMIT  1
        )

  WHERE t1.process_count_pred IS NULL
    AND t1.publn_auth = 'EP'
;



/***** Update text-mining data from families for EPO patents (now for US family members) ************************************/
UPDATE epo_08_patent_list t1
        SET  (
                process_count_pred
              , process_count_pred_ind

              , process_ratio_pred
              , process_ratio_pred_ind

              , first_process_claim_pred 

              , pred_from_family 
              
              )  = (

        SELECT  
                t2.process_count_pred
              , t2.process_count_pred_ind

              , t2.process_ratio_pred
              , t2.process_ratio_pred_ind

              , t2.first_process_claim_pred 

              , TRUE
              
        FROM epo_08_patent_list t2

        WHERE t2.publn_auth = 'US'
          AND t1.docdb_family_id = t2.docdb_family_id
          AND t2.process_count_pred IS NOT NULL

        ORDER BY publn_date ASC
        LIMIT  1
        )

  WHERE t1.process_count_pred IS NULL
    AND t1.publn_auth = 'EP'
;



UPDATE epo_08_patent_list
  SET pred_from_family = TRUE
WHERE total_count IS NOT NULL
  AND pred_from_family IS NOT FALSE
;
UPDATE epo_08_patent_list
  SET pred_from_family = TRUE
WHERE process_count_pred IS NOT NULL
  AND pred_from_family IS NOT FALSE
;


--**********************************************************************************************************************
--*** Match Name and Abstract Data to the Patent List directly *********************************************************
--**********************************************************************************************************************

--Updating title information
UPDATE public.epo_08_patent_list t1

SET   title_process = t2.title_process
    , title_use = t2.title_use

FROM epo_04_classfied_titles AS t2

WHERE  t1.appln_id = t2.appln_id
;


--Updating abstract information
UPDATE public.epo_08_patent_list t1

SET   abstract_process = t2.abstract_process
    , abstract_use = t2.abstract_use

FROM epo_05_classified_abstracts AS t2

WHERE  t1.appln_id = t2.appln_id
;

UPDATE public.epo_08_patent_list t1

SET abstract_process_pred = t2.process_abstr_pred

FROM epo_ep_abstracts_prediction_results_new AS t2

WHERE t1.appln_id = t2.appln_id
;



--**********************************************************************************************************************
--*** Find missing titles and abstracts in patent family ***************************************************************
--**********************************************************************************************************************

--information from abstract missing --> look into family if there is a PCT filing (appln_kind = 'W')
UPDATE epo_08_patent_list t
SET abstract_process  = t_.abstract_p ,
    abstract_use      = t_.abstract_u 

FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.abstract_process ,
  t4.abstract_process AS abstract_p ,
  t4.abstract_use     AS abstract_u 

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_05_classified_abstracts t4 ON t3.appln_id = t4.appln_id

WHERE t3.appln_kind = 'W'
  AND t1.abstract_process IS NULL

GROUP BY t1.appln_id, t1.abstract_process, t3.appln_id, t4.abstract_process, t4.abstract_use
ORDER BY t1.appln_id, t4.abstract_process 
) t_

WHERE t.appln_id = t_.appln_id
AND t.abstract_process IS NULL
AND t_.abstract_p IS NOT NULL
;



--information from abstract still missing --> look into family if there is a USPTO filing (appln_auth = 'US') ----------
UPDATE epo_08_patent_list t

SET abstract_process  = t_.abstract_p ,
    abstract_use      = t_.abstract_u 

FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.abstract_process ,
  t4.abstract_process AS abstract_p ,
  t4.abstract_use     AS abstract_u 

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_05_classified_abstracts t4 ON t3.appln_id = t4.appln_id

WHERE t4.abstract_process IS NOT NULL
  AND t3.appln_kind != 'W'
  AND t3.appln_auth = 'US'
  AND t1.abstract_process IS NULL

GROUP BY t1.appln_id, t1.abstract_process, t3.appln_id, t4.abstract_process, t4.abstract_use 
ORDER BY t1.appln_id, t4.abstract_process 
) t_

WHERE t.appln_id = t_.appln_id
AND t.abstract_process IS NULL
AND t_.abstract_p IS NOT NULL
;



--information from abstract still missing --> look into family if there is any English abstract ------------------------
UPDATE epo_08_patent_list t

SET abstract_process  = t_.abstract_p ,
    abstract_use      = t_.abstract_u 

FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.abstract_process ,
  t4.abstract_process AS abstract_p ,
  t4.abstract_use     AS abstract_u 

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_05_classified_abstracts t4 ON t3.appln_id = t4.appln_id

WHERE t4.abstract_process IS NOT NULL
  AND t3.appln_kind != 'W'
  AND t3.appln_auth != 'US'
  AND t4.appln_abstract_lg = 'en'
  AND t1.abstract_process IS NULL

GROUP BY t1.appln_id, t1.abstract_process, t3.appln_id, t4.abstract_process, t4.abstract_use 
ORDER BY t1.appln_id, t4.abstract_process 
) t_

WHERE t.appln_id = t_.appln_id
AND t.abstract_process IS NULL
AND t_.abstract_p IS NOT NULL
;


--information from abstract still missing --> look into family if there are any other abstracts in German or French ----------
UPDATE epo_08_patent_list t

SET abstract_process  = t_.abstract_p ,
    abstract_use      = t_.abstract_u 
	
FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.abstract_process ,
  t4.abstract_process AS abstract_p ,
  t4.abstract_use     AS abstract_u 

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_05_classified_abstracts t4 ON t3.appln_id = t4.appln_id

WHERE t4.abstract_process IS NOT NULL
  AND t1.abstract_process IS NULL

GROUP BY t1.appln_id, t1.abstract_process, t3.appln_id, t4.abstract_process, t4.abstract_use
ORDER BY t1.appln_id, t4.abstract_process 
) t_

WHERE t.appln_id = t_.appln_id
AND t.abstract_process IS NULL
AND t_.abstract_p IS NOT NULL
;

--text mining information from abstract missing --> look into family if there is a PCT filing (appln_kind = 'W')
UPDATE epo_08_patent_list t
SET
    abstract_process_pred = t_.abstract_p_p 

FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.abstract_process_pred ,
 
  t5.process_abstr_pred AS abstract_p_p

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_ep_abstracts_prediction_results_new t5 ON t3.appln_id = t5.appln_id

WHERE t3.appln_kind = 'W'
  AND t1.abstract_process_pred IS NULL

GROUP BY t1.appln_id, t1.abstract_process_pred, t3.appln_id, t5.process_abstr_pred
ORDER BY t1.appln_id, t5.process_abstr_pred
) t_
WHERE t.appln_id = t_.appln_id
AND t.abstract_process_pred IS NULL
AND t_.abstract_p_p IS NOT NULL
;



--text mining information from abstract still missing --> look into family if there is a USPTO filing (appln_auth = 'US') ----------
UPDATE epo_08_patent_list t
SET
    abstract_process_pred = t_.abstract_p_p 

FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.abstract_process_pred ,
 
  t5.process_abstr_pred AS abstract_p_p

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_ep_abstracts_prediction_results_new t5 ON t3.appln_id = t5.appln_id

WHERE t3.appln_kind != 'W'
   AND t3.appln_auth = 'US'
   AND t1.abstract_process_pred IS NULL

GROUP BY t1.appln_id, t1.abstract_process_pred, t3.appln_id, t5.process_abstr_pred
ORDER BY t1.appln_id, t5.process_abstr_pred 
) t_

WHERE t.appln_id = t_.appln_id
AND t.abstract_process_pred IS NULL
AND t_.abstract_p_p IS NOT NULL
;



--text mining information from abstract still missing --> look into family if there is another abstract ------------------------
UPDATE epo_08_patent_list t
SET
    abstract_process_pred = t_.abstract_p_p 

FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.abstract_process_pred ,
 
  t5.process_abstr_pred AS abstract_p_p

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_ep_abstracts_prediction_results_new t5 ON t3.appln_id = t5.appln_id

WHERE t3.appln_kind != 'W'
  AND t3.appln_auth != 'US'
  AND t1.abstract_process_pred IS NULL

GROUP BY t1.appln_id, t1.abstract_process_pred, t3.appln_id, t5.process_abstr_pred
ORDER BY t1.appln_id, t5.process_abstr_pred 
) t_

WHERE t.appln_id = t_.appln_id
AND t.abstract_process_pred IS NULL
AND t_.abstract_p_p IS NOT NULL
;



SELECT count(*)
FROM epo_08_patent_list
WHERE abstract_process IS NULL
AND publn_auth = 'EP'
;
--4'112'268







------------------------------------------------------------------------------------------------------------------------
-- DOING THE SAME FOR TITLES



--information from title missing --> look into family if there is a PCT filing (appln_kind = 'W')
UPDATE epo_08_patent_list t

SET title_process = title_p ,
    title_use     = title_u

FROM (

SELECT DISTINCT ON(t1.appln_id)

  t1.appln_id ,
  t1.title_process ,
  t4.title_process  AS title_p ,
  t4.title_use      AS title_u

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_04_classfied_titles t4 ON t3.appln_id = t4.appln_id

WHERE t3.appln_kind = 'W' 
AND  t1.title_process IS NULL
GROUP BY t1.appln_id, t1.title_process, t3.appln_id, t4.title_process, t4.title_use
order by t1.appln_id, t4.title_process
) t_

WHERE t.appln_id = t_.appln_id
AND t.title_process IS NULL
AND t_.title_p IS NOT NULL
;


--information from title still missing --> look into family if there is a USPTO filing (appln_auth = 'US') -------------
UPDATE epo_08_patent_list t

SET title_process = title_p ,
    title_use     = title_u

FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.title_process ,
  t4.title_process  AS title_p ,
  t4.title_use      AS title_u

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_04_classfied_titles t4 ON t3.appln_id = t4.appln_id

WHERE t3.appln_kind != 'W' 
AND t3.appln_auth = 'US' 
AND t1.title_process IS NULL
GROUP BY t1.appln_id, t1.title_process, t3.appln_id, t4.title_process, t4.title_use
order by t1.appln_id, t4.title_process
) t_

WHERE t.appln_id = t_.appln_id
  AND t.title_process IS NULL
  AND t_.title_p IS NOT NULL
;



--information from title still missing --> look into family if there is any English title
UPDATE epo_08_patent_list t

SET title_process = title_p ,
    title_use     = title_u

FROM (

SELECT DISTINCT ON(t1.appln_id)
  t1.appln_id ,
  t1.title_process ,
  t4.title_process  AS title_p ,
  t4.title_use      AS title_u

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_04_classfied_titles t4 ON t3.appln_id = t4.appln_id

WHERE t3.appln_kind != 'W' 
AND t3.appln_auth != 'US' 
AND t4.appln_title_lg = 'en' 
AND t1.title_process IS NULL
GROUP BY t1.appln_id, t1.title_process, t3.appln_id, t4.title_process, t4.title_use
order by t1.appln_id, t4.title_process
) t_

WHERE t.appln_id = t_.appln_id
AND t.title_process IS NULL
  AND t_.title_p IS NOT NULL
;



--information from title still missing --> look into family if there are any titles in German or French ----------------
UPDATE epo_08_patent_list t

SET title_process = title_p ,
    title_use =     title_u

FROM (

SELECT
  t1.appln_id ,
  t1.title_process ,
  t4.title_process  AS title_p ,
  t4.title_use      AS title_u

FROM epo_08_patent_list t1
JOIN patstat.tls201_appln t2 ON t1.appln_id = t2.appln_id
RIGHT OUTER JOIN patstat.tls201_appln t3 ON t2.docdb_family_id = t3.docdb_family_id
JOIN epo_04_classfied_titles t4 ON t3.appln_id = t4.appln_id

WHERE t4.title_process IS NOT NULL AND t1.title_process IS NULL
GROUP BY t1.appln_id, t1.title_process, t3.appln_id, t4.title_process, t4.title_use
order by t1.appln_id, t4.title_process
) t_

WHERE t.appln_id = t_.appln_id
AND t.title_process IS NULL
  AND t_.title_p IS NOT NULL
;










--**********************************************************************************************************************
--*** Check Count of Missing Claim Information *************************************************************************
--**********************************************************************************************************************


SELECT count(*)
  FROM epo_08_patent_list
 WHERE epo_08_patent_list.total_count IS NULL

     --AND publn_auth = 'EP'
;

--CLAIMS
--US Total:       15'365'440
--US NOT NULL:     9'948'279
----------------------------
--EPO Total:       5'923'863
--EPO NOT NULL:    5'543'228
----------------------------
--Total:          21'289'303
--Total NOT NULL: 15'491'507


SELECT count(*)
  FROM epo_08_patent_list
  WHERE epo_08_patent_list.title_process IS NULL
  --WHERE epo_08_patent_list.abstract_process IS NULL
    AND publn_auth IN('EP', 'US')
;
--Titles NULL:      1'027'848
--Abstracts NULL:   4'112'268



SELECT count(*)

  FROM epo_08_patent_list

 WHERE epo_08_patent_list.total_count IS NOT NULL
   AND publn_claims != 0
   AND CASE WHEN publn_auth = 'EP' THEN earliest_filing_year <= 2016
            ELSE earliest_filing_year <= 2014 END
;

--IS NULL:        680'112
--IS NOT NULL:  8'889'585




SELECT publn_auth, earlieast_filing_year, count(*)
FROM epo_08_patent_list
WHERE     total_count IS NULL
      AND publn_claims != 0
      --AND publn_auth = 'US'
      AND CASE WHEN publn_auth = 'EP' THEN earliest_filing_year <= 2016
               ELSE earliest_filing_year <= 2014 END
GROUP BY earliest_filing_year, publn_auth
ORDER BY earliest_filing_year
;


SELECT count(*)
FROM epo_08_patent_list
WHERE     total_count IS NULL
      AND publn_claims != 0
      --AND publn_auth = 'US'
      --AND earlieast_filing_year >= 1975
      AND CASE WHEN publn_auth = 'EP' THEN earliest_filing_year <=2016
               ELSE earliest_filing_year <= 2014 END
;



SELECT *
FROM epo_08_patent_list
WHERE total_count IS NULL
  AND CASE WHEN publn_auth = 'EP' THEN earliest_filing_year <= 2016
               ELSE earliest_filing_year <= 2014 END
  ORDER BY random()
LIMIT 1000
;


SELECT *
FROM epo_08_patent_list
WHERE match_incl_zero = TRUE
ORDER BY random()
;


SELECT count(*)
FROM epo_zew_unmatched
;
--ZEW Total: 63'283

SELECT count(*)
FROM epo_zew_unmatched t1
LEFT JOIN epo_08_patent_list t2 ON t1.appln_id = t2.appln_id
WHERE t2.total_count IS NULL
--ORDER BY random()
;
--ZEW NULL: 7'745



SELECT count(*)
  FROM epo_08_patent_list
  WHERE epo_08_patent_list.title_process IS NULL
;
--1'027'848
--21'289'303




--**********************************************************************************************************************
--*** Export 'raw' data for ZEW ****************************************************************************************
--**********************************************************************************************************************

DROP TABLE IF EXISTS epo_zew_export_temp;
CREATE TABLE epo_zew_export_temp AS

  SELECT  publn_auth
          , publn_nr
          , publn_nr_original
          , publn_kind
		  , publn_first_grant
          , appln_id
          , appln_filing_date
          , publn_date
          , earliest_filing_date
		  , docdb_family_id

          , process_count_2
          , process_count_2_ind
          , process_count_5
          , process_count_5_ind
          , process_count_pred
          , process_count_pred_ind

          , use_count
          , use_count_ind

          , product_by_process_count_2
          , product_by_process_count_2_ind
          , product_by_process_count_5
          , product_by_process_count_5_ind

          , total_count
          , total_count_ind

          , process_ratio_2
          , process_ratio_2_ind
          , process_ratio_5
          , process_ratio_5_ind
          , process_ratio_pred
          , process_ratio_pred_ind
          
          , first_process_claim_2 
          , first_process_claim_5 
          , first_use_claim 
          , first_process_claim_pred 
          , first_product_by_process_2 
          , first_product_by_process_5

          , use_ratio
          , use_ratio_ind

          , product_by_process_ratio_2
          , product_by_process_ratio_2_ind

          , title_process
          , title_use

          , abstract_process
          , abstract_use
          , abstract_process_pred

  FROM epo_08_patent_list
;

ALTER TABLE epo_zew_export_temp ADD COLUMN nb_citing_docdb_fam int;
UPDATE epo_zew_export_temp t1 set nb_citing_docdb_fam = t2.nb_citing_docdb_fam
FROM tls201_appln t2 where t1.appln_id = t2.appln_id;

--Export code for psql
\COPY epo_zew_export_temp TO 'C:/Users/seligerf/KOF Dropbox/01 Projects(funded)/FBI.FP.93 - EPO/02 Daten/epo_zew_export_temp.csv' DELIMITER ';' CSV HEADER;




